# Load Python libraries
import io
import numpy as np
import pandas as pd
from collections import Counter
from PIL import Image
# Load Plot libraries
import seaborn as sns
import matplotlib.pyplot as plt
# Loading an example image
file_path = "../data/img/example-1.png"
img = Image.open(file_path)
# Show image dimension (resolution)
img.size
# Show image extension
img.format
# Show image
img
# Read file in low level (bit list)
with open(file_path, 'rb') as f:
low_byte_list = bytearray(f.read())
# Show size (KB)
round(len(low_byte_list) / 1024, 2)
# Show size (MB)
round(len(low_byte_list) / 1024 / 1020, 2)
# Create a matrix
row_len = 2232
col_len = 1252
matrix = np.zeros((row_len, col_len))
matrix.shape
# Calculate additional bits
gap = np.prod(matrix.shape) - len(low_byte_list)
gap
# Save bytes into matrix
data = np.array(low_byte_list)
for i in range(0, len(data)):
ix_row = int(i / col_len)
ix_col = i % col_len
matrix[ix_row][ix_col] = data[i]
# Plot image in binary
fig, ax = plt.subplots(figsize = (14, 14))
sns.heatmap(matrix, ax = ax)
ax.set_title("Bytes of the Image", fontsize = 16)
ax.set_xlabel('columns', fontsize = 12)
ax.set_ylabel('rows', fontsize = 12)
plt.show()
# Calculate code frequency
term_freq = Counter(low_byte_list)
n = len(term_freq)
n
# Normalize term frequency
N = sum(term_freq.values())
for term in term_freq:
term_freq[term] = term_freq[term] / N
# Create a temp dataframe
df = pd.DataFrame.from_records(term_freq.most_common(n), columns = ['Byte', 'Frequency'])
df.head(10)
# Create pretty x axis labels
x_labels = []
for ix in range(256):
if ix % 5 == 0:
x_labels.append(str(ix))
else:
x_labels.append('')
# Plot the frequency of the bytes in the file
fig = plt.figure(figsize = (18, 6))
ax = sns.barplot(x = 'Byte', y = 'Frequency', data = df.sort_values(by=['Byte']), palette=("Blues_d"))
ax.set_xticklabels(labels = x_labels, fontsize = 10, rotation = 50)
plt.title('Bytes Frequency of the Image')
plt.show()
# Create a new image
# file_path = "../data/img/example-2.png"
# new_img = Image.open(io.BytesIO(low_byte_list))
# new_img.save(file_path)